knitr::opts_chunk$set(fig.width=9, fig.height=8 #,results="asis" ,echo = T ,message = F ,warning = F ) # ws --------------------------------------------------------------------------- rm(list = ls()) require(tidyverse) require(sf) # option setting sf_use_s2(T) options(tigris_use_cache = TRUE) # dropbox dir or della base dir ddir <- Sys.getenv('drop_dir') # '/scratch/gpfs/km31/' devtools::load_all()
# sample areas ----------------------------------------------------------------- library(geox) arrs <- geox::rpops %>% filter(pop > 50e3 ,rt == 'cz') %>% add.rns() arrs %>% filter(pop > 500e3) %>% arrange(pop) %>% head(100) %>% pull(rn) arrs %>% filter(grepl('Portland', rn)) czsf <- build.CZs('20100') plcsf <- geox::places.wrapper(x = czsf) library(mapview) czsf %>% mapview() + mapview(plcsf, color = 'red') # for analysis, i want probably cz/cbsa (or maybe sometimes Place) # for visuals, i want region network cropped to bbox # for sample, i'll use Place smplc <- plcsf %>% filter(grepl('^Portland', name)) %>% st_transform(4326) smplc %>% mapview() # get block groups for place --------------------------------------------------- bgs <- geox::nbhds.from.sf(x = st_bbox(smplc) ,query.fcn = tigris::block_groups) bgs[3] %>% plot() # remove water areas (per census naming conventions) sbgs <- bgs %>% filter(substr(tractce,1,2) != '99') # subset to polygon, not bboxx sbgs <- st_make_valid(sbgs) sbgs <- geox::get.spatial.overlap(sbgs, smplc ,'geoid', 'placefp') # filter original census query to keep all columns sbgs <- bgs %>% filter(geoid %in% sbgs$geoid) sbgs['awater'] %>% plot() # just b/c for visuals, remove islands in ad hoc way sbgs <- sbgs %>% filter(tractce != '002400') bounds <- st_sf(geometry = st_union(sbgs)) # load safegraph for area ------------------------------------------------------ # function loads all CZs intersecting with area sfg <- sfx2sfg(bounds ,min.flows = 10 ,tracts.or.groups = 'bg' ,trim.loops = T ,year=2019) #trim to just place sfgt <- sfg %>% filter(across(c(origin, dest) , ~.x %in% sbgs$geoid)) # turn to (undirected) graph ugh <- sfg2gh(sfgt, directed = F) ugh ghsf <- spatialize.graph(ugh, frame.sf = NULL ,tracts.or.groups = 'bg' ,directed = F ,year = 2019) # transform ghsf <- st_transform(ghsf, 4326)
Summarise edges (trips and tie strength); visualize
ghsf sbgs # flow summaries ------------------------------------------------------------- edges <- ghsf %>% activate(edges) %>% as_tibble() tibble(edges)[Hmisc::Cs(n, tstr, dst)] %>% map(summary) tibble(edges)[Hmisc::Cs(n, tstr, dst)] %>% map( ~quantile(.x, seq(0,1,.1))) # trim by flow str ------------------------------------------------------------- bounds sbgs # skipped; sample area seems small enough trimmed.ghsf <- apply.flow.filters(ghsf ,frame.sf = NULL ,directed = F ,min.tie.str = NULL ,tie.str.deciles = 5 # filter out bottom x deciles ,min.flows = 10 ,max.dst = units::set_units(20, 'miles')) # map graph ------------------------------------------------------------ sbgs <- sbgs %>% st_transform(4326) # get background for mapping sttm <- visaux::get.stamen.bkg( sbgs , zoom = 12 ,maptype = 'toner-background' ) # lonlat matrix for notes lonlats <- ghsf %>% activate('nodes') %>% as_tibble() %>% st_sf() %>% st_coordinates() library(ggraph) ggmap(sttm , base_layer = ggraph(trimmed.ghsf # ghsf # , layout = lonlats ) ) + geom_edge_density(fill = '#00D0D0') + geom_edge_fan( aes( edge_alpha = tstr ,edge_width = tstr) ,color = '#008080' ) + flow.map.base() + visaux::bbox2ggcrop(bounds)
Can use pre-loaded divisions. I kind of like just using the function to re-generate because it makes the process more self-contained and easier to adjust.
I allocate neighborhoods (defined as block groups) to sides-of-divisons based on interstates and all urban arterials for this example.
# merge in n'hood level ERGM controls ------------------------------- # (re)generate nhood divisions ---------------------------------------------- #' i was previously working with NHPN data (national highway planning network) #' but it was often frustratingly messy. Switching to spatial census data, which is #' much better, but more complex #' Census codes for road features are referenced here: #' #' https://www2.census.gov/geo/pdfs/reference/mtfccs2019.pdf #' (See MTFCCS: S1100, S1200, S1400) sbgs rds <- tigris::roads(state = unique(sbgs$statefp) ,county = unique(sbgs$countyfp)) %>% rename_with(tolower) rds <- rds %>% st_transform(4326) rds <- st_crop(rds, smplc) # just interstates interstates <- rds %>% filter(rttyp %in% 'I') # all primary & secondary roads (limited-access and arterials) and hwy ramps arterials <- rds %>% filter(mtfcc %in% Hmisc::Cs(S1100, S1200, S1630)) # call divM function bounds$placefp <- smplc$placefp xnbd <- divM::gen.cross.tract.dividedness( bounds ,interstates ,nbd.query.fcn = tigris::block_groups ,region.id.colm = 'placefp' ,fill.nhpn.gaps = F # this can help in some cases and not others... nhpn data is not perfect ,erase.water = F ,year = 2019 ) %>% select(geoid, int.poly = poly.id) xnbd.a <- divM::gen.cross.tract.dividedness( bounds ,arterials ,nbd.query.fcn = tigris::block_groups ,region.id.colm = 'placefp' ,fill.nhpn.gaps = F # this can help in some cases and not others... nhpn data is not perfect ,erase.water = F ,year = 2019 ) %>% select(geoid, arterial.poly = poly.id) # merge in w/ nodes ghsf <- ghsf %>% activate('nodes') %>% left_join(xnbd , by = c('name' = 'geoid')) %>% left_join(xnbd.a , by = c('name' = 'geoid')) # vis ghsf %>% activate('nodes') %>% as_tibble() %>% st_sf() %>% mapview(zcol = 'int.poly') + mapview(interstates) + mapview(bounds) ghsf %>% activate('nodes') %>% as_tibble() %>% st_sf() %>% mapview(zcol = 'arterial.poly') + mapview(arterials) + mapview(bounds)
Keep simple for this example
# get other tract-level characteristics ---------------------------------------- demos <- sfg.seg::demos.2019 %>% select(1,2,matches('wh$|bl$|hsp$')) ghsf <- ghsf %>% activate('nodes') %>% left_join(demos , by = c('name' = 'geoid')) #region-level demos demos %>% select(matches('pop|^n')) %>% colSums() %>% format(big.mark = ',') # play with democgraphic flow visualization ------------------------------------ # could be better if not undirected dghsf <- ghsf %>% activate('edges') %>% mutate(flow.n.bl = n * (.N()$n.bl[from] + .N()$n.bl[to]) / 2 ,flow.n.hsp = n * (.N()$n.hsp[from] + .N()$n.hsp[to]) / 2 ,flow.n.wh = n * (.N()$n.wh[from] + .N()$n.wh[to]) / 2 ) # .....
set.seed(1234) # final setup for ergm --------------------------------------------------------- # ergm can't handle a lot of things: no factors, no geometry, no bundled units, no # NAs, etc.... # de-spatialize ugh <- ghsf %>% as_tbl_graph() %>% activate('edges') %>% select(-geometry) %>% activate('nodes') %>% select(-geometry) # remove units (should be meters) ugh <- ugh %>% activate('edges') %>% mutate(dst = as.numeric(dst)) # convert using statnet packages # hard to manipulate, but built for the ergm implementation devtools::load_all() library(statnet) eugh <- ergm.clean.n.convert(ugh) # run ergm --------------------------------------------------------------------- #install.packages("ergm.count") #install.packages("ergm.rank") #install.packages("latentnet") #update.packages() library(ergm) library(ergm.rank) library(latentnet) eugh fo.base <- formula( eugh ~ edges + nodematch("int.poly", diff = FALSE) + #nodematch("hwy.poly", diff = FALSE) + absdiff("perc.wh", pow=1) ) ugh # ergms take while to run. A good time to use Della ergm1 <- ergm(formula = fo.base ,"n" ,reference = ~Poisson) ergm1 %>% summary()
# generate spatial interaction fcn --------------------------------------------- # my little SIF tutorial: # https://rpubs.com/kmc39/sif sfn <- ghsf %>% activate('nodes') %>% as_tibble() sfe <- ghsf %>% activate('edges') %>% as_tibble() # turn distance into numerics representing km sfe <- sfe %>% mutate(dst = as.numeric(dst) / 1000 ) dst.mat <- build.pairwise.dst.matrix(sfn) dst.mat %>% head() # values are kilometers # binned relative frequencies (# of ties over distance, relative to crosswise # distance distribution) dst.freq <- relative.tie_dst.frequency( sfn, sfe , dst.mat = dst.mat , n.bins = 60 ) dst.freq dst.freq %>% ggplot(aes(x = dst.lvl, y = avg.flow)) + geom_path() # log-log'd dst.freq %>% ggplot(aes(x = log(dst.lvl), y = log(avg.flow))) + geom_path() # SIF typically shows decreasing flows with distance, until a certain point where the # relationship is lost. About 2km here. # use flow bins to parameterize a power law SIF # below relies on the `dst.freq` in the general environment param.fitting <- param.fitting <- stats::optim(par= c(1,1.7,1, 10) ,fn = function(x) power.law.loss.fcn(x, dst.freq) , hessian = T) # predicted tie probabilities, based on SIF dst.freq$flow.hat <- power.law(dst.freq$dst.lvl, param.fitting$par[1], param.fitting$par[2], param.fitting$par[3] ) dst.freq %>% select(dst.lvl, avg.flow, flow.hat) %>% pivot_longer(cols = contains("flow"), values_to = "avg.flow") %>% ggplot() + geom_path(aes(color = name, y = avg.flow, x = dst.lvl)) + ggtitle("Relative avg flows", "actual vs. fitted power law") decay.mat <- dst.mat %>% power.law(param.fitting$par[1], param.fitting$par[2], param.fitting$par[3])
# ERGM with sif, only interstates ---------------------------------------------------------------- fo.sif <- formula( eugh ~ edges + nodematch("int.poly", diff = FALSE) + #nodematch("hwy.poly", diff = FALSE) + absdiff("perc.wh", pow=1) + edgecov(decay.mat, "dst") ) eugh # ergms take while to run. A good time to use Della ergm2 <- ergm(formula = fo.sif ,"tstr" ,reference = ~Poisson) ergm2 %>% summary() # ERGM with sif with arterials ---------------------------------------------------------------- fo.sif <- formula( eugh ~ edges + nodematch("arterial.poly", diff = FALSE) + #nodematch("hwy.poly", diff = FALSE) + absdiff("perc.wh", pow=1) + edgecov(decay.mat, "dst") ) eugh # ergms take while to run. A good time to use Della ergm2 <- ergm(formula = fo.sif ,"tstr" ,reference = ~Poisson) ergm2 %>% summary()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.